###########################################################################
###### REPLICATION FILE: POLICY FEED-IN AND INTERDEPENDENCE ##############
###########################################################################

rm(list=ls())

#load packages
library(rio)
library(dplyr)
library(tidyverse)
library(stargazer)
library(plm)
library(lme4)
library(viridis)

#set wd
setwd("C:/Users/samtr/Dropbox/Dissertation/Climate/dg_cross_state/perspectives/production/replication_files")
#need to change this for your machine

#import state-year level data
dat <- import("state_year.xlsx")

#import state-year-tpo level data
tpo.dat <- import("state_tpo_year.xlsx")

##### figure 1 #####
fin.year <- 2018
ggplot(data = dat[dat$state != "DC" & dat$year<=2018,], aes(x = year)) +
  geom_line(aes(color = state, y = percent_rooftop), show_guide = F) + 
  theme_bw()+
  geom_text(data = subset(dat[dat$state != "DC",], year==fin.year), aes(label = state, y = percent_rooftop), size=3, position = position_jitter(), check_overlap=T)+
  labs(x = "", y = "Percent", fill = "")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))+
  scale_x_continuous(breaks = scales::pretty_breaks(n = 5)) +
  theme(legend.position="bottom")+
  annotate("text", x=2017.5, y=-.25, label= "(other states)") + 
  theme(text = element_text(size=14))+
  scale_color_viridis(discrete=T)+
  theme(legend.title=element_blank())

##### table 1 #####
test.dat <- subset(dat, year <= 2017 & year >= 2011) #2010 is first year of data; so for changes need to start in 2011. FTG series last updated in 2017.

#add scalar of 5 to reduce loss of sample from y-o-y declines
test.dat$roof_solar_mw_chadj <- test.dat$roof_solar_mw_ch + 5
test.dat$roof_solar_mw_chadj <- ifelse(test.dat$roof_solar_mw_chadj < 0, NA, test.dat$roof_solar_mw_chadj) 

#robustness check NA for all negative
test.dat$roof_solar_mw_chadj2 <- ifelse(test.dat$roof_solar_mw_ch < 0, NA, test.dat$roof_solar_mw_ch) 

#gdp adjusted by scalar. 
test.dat$gdp_per_adj <- .001*test.dat$gdp_per

### main models ###
#fe model
test.dat <- test.dat %>% ungroup()
fe.reg = plm(log(roof_solar_mw_chadj) ~ ftg, data = test.dat, effect = "twoways", index = c("state","year"), model = "within")
summary(fe.reg)

#multi-level model
mlm = lmer(log(roof_solar_mw_chadj)
           #year state varying   
           ~  ftg + solar_potential_adj + gdp_per_adj + total_price + 
             + (1|state) + (1|year), #state and year FE 
           data = test.dat) 

#corresponding regression table 
names = c("Freeing the Grid Score", "Solar Potential", "GDP Per Capita", "Electricity Price")
stargazer(fe.reg, mlm,
          no.space = T, title = "DG Policy and Solar Growth",
          digits = 2,
          covariate.labels=names,
          dep.var.caption  = "Distributed solar growth (logged MW)",
          dep.var.labels = c("blah", "blah", "blah"),
          dep.var.labels.include = T,
          keep.stat="n",
          label = "dg_policy_effects",
          omit = c("time","Constant")          
)

#calculating effect size
log_effect <- function(coef) {
  (exp(coef) - 1)*100
}
log_effect(fe.reg$coef)


##### figure 2 #####
ggplot(data = dat[dat$year<2017,], aes(x = year)) +
  geom_bar(aes(y = roof_solar_mw), stat = "identity") + 
  geom_line(aes(y = tot_dg_lobby_sum/.01), linetype = 2)+
  scale_y_continuous(sec.axis = sec_axis(~.*.01, name = "State-level lobbying registrations"))+
  theme_bw()+
  labs(y = "MW installed", x = "", fill = "")+
  theme(axis.text.x = element_text(size  = 12, angle = 45, hjust = 1, vjust = 1))+
  theme(axis.text.y = element_text(size  = 12))+
  scale_x_continuous(breaks = scales::pretty_breaks(n = 8)) +
  theme(legend.position="bottom")+
  theme( axis.title.y = element_text(size = 12, angle = 90))+
  annotate("text", x=2015.8, y=15000, label= "MW") + 
  annotate("text", x=2014.3, y=30000, label= "Lobbying registrations") + 
  theme(legend.title=element_blank())


##### table 2 #####

#variable for total installs by tpo in a year across the states
tpo.dat$time = tpo.dat$year - 2010

#top coded at 10
tpo.dat$num_lobbyists_cap = ifelse(tpo.dat$num_lobbyists<10, tpo.dat$num_lobbyists, 10)

#correlation between # and spend
cor(tpo.dat$num_lobbyists, log(tpo.dat$lobby_dollars+1),use="complete.obs") #logged
cor(tpo.dat$num_lobbyists, tpo.dat$lobby_dollars,use="complete.obs")

#vbl for total tpo in a year (across states) for both data sets 
tpo.dat = tpo.dat %>%
  dplyr::group_by(year, tpo) %>%
  dplyr::mutate(total_year_tpo = sum(total_tpo))

#subtracting within state TPO for "other-state TPO" 
tpo.dat$total_year_tpo2 = tpo.dat$total_year_tpo - tpo.dat$total_tpo

#taking logs
tpo.dat$log_total_tpo = log(tpo.dat$total_tpo+1)
tpo.dat$log_total_year_tpo = log(tpo.dat$total_year_tpo+1)
tpo.dat$log_total_year_tpo2 = log(tpo.dat$total_year_tpo2+1)

#binary measure 
tpo.dat$binary_num = tpo.dat$num_lobbyists > 0

#binary firm presence 
mlm = lmer(binary_num
           ~  log_total_tpo + log_total_year_tpo2 + time #+ time
           + (1|state) + (1|year) + (1|tpo), 
           data = tpo.dat)


#basic glm number of lobbyists negative binomial
mlm.nb = glmer.nb(num_lobbyists 
                  ~  log_total_tpo + log_total_year_tpo2 + time
                  + (1|state) + (1|year) + (1|tpo), #state and year FE 
                  data = tpo.dat, verbose = T) 

#dollar amounts
mlm.dollars = lmer(log(lobby_dollars+1) 
                   #year state varying   
                   ~  log_total_tpo + log_total_year_tpo2 + time
                   + (1|state) + (1|year) + (1|tpo), #state and year FE 
                   data = tpo.dat) 

#comparing variance and mean
var(tpo.dat$num_lobbyists,na.rm=T)
mean(tpo.dat$num_lobbyists,na.rm=T)

var(tpo.dat$num_lobbyists_cap,na.rm=T)
mean(tpo.dat$num_lobbyists_cap,na.rm=T)

#robustness capped amount
mlm.cap = glmer(num_lobbyists_cap 
                ~  log_total_tpo + log_total_year_tpo2 + time
                + (1|state) + (1|year) + (1|tpo), 
                data = tpo.dat, family = "poisson") 

names = c("Within-State MW", "Other-State MW", "Time Trend")
stargazer(mlm, mlm.nb, mlm.dollars,
          no.space = T, title = "TPO MW Installed and Lobbying",
          digits = 2,
          covariate.labels=names,
          dep.var.caption  = "TPO MW Installed and Lobbying",
          dep.var.labels.include = FALSE,
          keep.stat="n",
          label = "dg_reg_tpo_feedback",
          omit = c("time", "Constant")
)


##### table 4 #####
dat <- dat %>% ungroup()
dat$tot_companies_lobby = with(dat,
                               (num_Sunrun > 0) + (num_Solarcity > 0) + (num_Vivint > 0) + (num_Sunpower > 0) + (num_Sunedison > 0))

#mean center for interaction
dat$log_roof_solar_mw_mc <- log(dat$roof_solar_mw+1) - mean(log(dat$roof_solar_mw+1))
dat$tot_companies_lobby_mc <- dat$tot_companies_lobby - mean(dat$tot_companies_lobby,na.rm=T)

#regression
fe.reg1 = plm(ftg_lead ~ tot_companies_lobby_mc, data = subset(dat,year<2017), effect = "twoways", index = c("state","year"), model = "within")
summary(fe.reg1)
fe.reg2 = plm(ftg_lead ~ tot_companies_lobby_mc + log_roof_solar_mw_mc, data = subset(dat,year<2017), effect = "twoways", index = c("state","year"), model = "within")
summary(fe.reg2)
fe.reg.int = plm(ftg_lead ~ tot_companies_lobby_mc + log_roof_solar_mw_mc + tot_companies_lobby_mc*log_roof_solar_mw_mc, data = subset(dat,year<2017), effect = "twoways", index = c("state","year"), model = "within")
summary(fe.reg.int)

names = c("Number firms lobbying", "Distributed solar capacity \n (logged MW)", "Number firms lobbying * distributed solar capacity")
stargazer(fe.reg1, fe.reg2, fe.reg.int,
          no.space = T, title = "",
          digits = 2,
          covariate.labels=names,
          dep.var.caption  = "State policy score (1-5)",
          dep.var.labels.include = FALSE,
          keep.stat="n",
          label = "dg_reg_tpo_feedback",
          omit = c("time", "Constant")
)

